###
#####LOAD NECESSARY FILES AND PACKAGES
library(foreign)
library(haven)
library(survey)
library(readxl)
library(stringr)
#Set working director and populate lists of surveys
setwd(file.path("/Data/"))
listDHS <- dir(pattern = "*.DTA")
listMICS <- dir(pattern = "*.csv")
#Load spreadsheet of variable names, then separate between DHS and MICS files
variables <- read_excel("Variables3.xls")
variablesDHS <- variables[which(variables$DHSorMICS=="DHS"), ]
variablesMICS <- variables[which(variables$DHSorMICS=="MICS"), ]
remove(variables)
#####
###





###
#####
#####LOAD AND PROCESS DHS FILES
loop <- NA
for(loop in 1:length(listDHS)){
  data <- read.dta(listDHS[loop], convert.factors=FALSE)
  indicator <- which(variablesDHS$Filename == substring(listDHS[loop],1,nchar(listDHS[loop])-4))
  
  ###
  #####CREATE SURVEY MECHANICS AND RESPONDENT IDENTIFICATION VARIABLES
  data$CountryName <- variablesDHS$Country[indicator]
  data$Interview.Year <- eval(parse(text=paste("data","$",variablesDHS$Interview.Year[indicator],sep="")))
  data$Year <- variablesDHS$Year[indicator]
  data$DHS <- ifelse(variablesDHS$DHSorMICS[indicator] == "DHS", 1, 0)
  data$Phase <- variablesDHS$Phase[indicator]
  data$FileName <- variablesDHS$Filename[indicator]
  data$EverMarried <- variablesDHS$EverMarried[indicator]
  data$SampleSize <- variablesDHS$SampleSize[indicator]
  data$Population <- variablesDHS$Women1549WPP2017[indicator]
  data$SurveyWeight <- (eval(parse(text=paste("data","$",variablesDHS$Weight[indicator],sep="")))/1000000)
  data$caseid <- NA
  if(is.null(variablesDHS$CaseID[indicator]) == FALSE & variablesDHS$CaseID[indicator] %in% ls(data) == TRUE){
    data$caseid <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$CaseID[indicator],sep=""))),sep="")
  }
  data$Interviewer <- NA
  if(is.null(variablesDHS$Interviewer[indicator]) == FALSE & variablesDHS$Interviewer[indicator] %in% ls(data) == TRUE){
    data$Interviewer <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$Interviewer[indicator],sep=""))),sep="")
  }
  data$Supervisor <- NA
  if(is.null(variablesDHS$Supervisor[indicator]) == FALSE & variablesDHS$Supervisor[indicator] %in% ls(data) == TRUE){
    data$Supervisor <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$Supervisor[indicator],sep=""))),sep="")
  }
  data$PSU <- NA
  if(is.null(variablesDHS$PSU[indicator]) == FALSE & variablesDHS$PSU[indicator] %in% ls(data) == TRUE){
    data$PSU <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$PSU[indicator],sep=""))),sep="")
  }
  data$SSN <- NA
  if(is.null(variablesDHS$SSN[indicator]) == FALSE & variablesDHS$SSN[indicator] %in% ls(data) == TRUE){
    data$SSN <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$SSN[indicator],sep=""))),sep="")
  }
  data$cluster <- NA
  if(is.null(variablesDHS$Cluster[indicator]) == FALSE & variablesDHS$Cluster[indicator] %in% ls(data) == TRUE){
    data$cluster <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$Cluster[indicator],sep=""))),sep="")
  }
  #####
  ###

  
  ###
  #####CREATE INDIVIDUAL-LEVEL COVARIATES#####
  data$Region <- NA
  if(is.null(variablesDHS$Region[indicator]) == FALSE & variablesDHS$Region[indicator] %in% ls(data) == TRUE){
    data$Region <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$Region[indicator],sep=""))),sep="")
  }
  
  data$Religion <- NA
  if(is.null(variablesDHS$religion[indicator]) == FALSE & variablesDHS$religion[indicator] %in% ls(data) == TRUE){
    data$Religion <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$religion[indicator],sep=""))),sep="")
  }
  
  data$Ethnicity <- NA
  if(is.null(variablesDHS$ethnicity[indicator]) == FALSE & variablesDHS$ethnicity[indicator] %in% ls(data) == TRUE){
    data$Ethnicity <- paste(variablesDHS$Filename[indicator],eval(parse(text=paste("data","$",variablesDHS$ethnicity[indicator],sep=""))),sep="")
  }
  
  #Recode age, recoding as "NA" any responses outside sample frame
  data$Age <- as.numeric(trimws(as.character(eval(parse(text=paste("data","$",variablesDHS$Age[indicator],sep=""))))))
  data$Age <- ifelse(data$Age < 15, NA, data$Age)
  data$Age <- ifelse(variablesDHS$Filename[indicator] == "EGIQ73FL" & data$Age > 59, NA, data$Age)
  data$Age <- ifelse(variablesDHS$Filename[indicator] != "EGIQ73FL" & data$Age > 49, NA, data$Age)
  
  #Rename the birth-year variable, standardizing two-digit entries to four digits
  data$BirthYear <- NA
  if(mean(nchar(eval(parse(text=paste("data","$",variablesDHS$BirthYear[indicator],sep=""))))) > 3){
    data$BirthYear <- as.numeric(as.character(eval(parse(text=paste("data","$",variablesDHS$BirthYear[indicator],sep="")))))
  } else{
    data$year1900 <- 1900
    data$BirthYear <- (as.numeric(data$year1900) + as.numeric(as.character(eval(parse(text=paste("data","$",variablesDHS$BirthYear[indicator],sep=""))))))
  }
  data$BirthYear <- ifelse(data$BirthYear < 1939 | data$BirthYear > 2003, NA, data$BirthYear)
  data$BirthYearAge <- as.numeric(trimws(as.character(eval(parse(text=paste(variablesDHS$Year[indicator],sep=""))))))
  data$BirthYearAge <- data$BirthYearAge - data$Age
  data$BirthYearAge <- ifelse(is.na(data$Age) == TRUE, NA, data$BirthYearAge)
  data$BirthYearAge <- ifelse(is.na(data$BirthYear) == TRUE, data$BirthYearAge, data$BirthYear)
  
  #Create "type of childhood residence" variable
  data$Childhood <- NA
  if(is.null(variablesDHS$childhood[indicator]) == FALSE & variablesDHS$childhood[indicator] %in% ls(data) == TRUE){
    data$Childhood <- (eval(parse(text=paste("data","$",variablesDHS$childhood[indicator],sep=""))))
    data$Childhood <- ifelse(data$Childhood == "capital, large city" | data$Childhood == "0", "0", data$Childhood)
    data$Childhood <- ifelse(data$Childhood == "city" | data$Childhood == "1", "1", data$Childhood)
    data$Childhood <- ifelse(data$Childhood == "town" | data$Childhood == "2", "2", data$Childhood)
    data$Childhood <- ifelse(data$Childhood == "countryside" | data$Childhood == "3", "3", data$Childhood)
    data$Childhood <- ifelse(data$Childhood == "abroad" | data$Childhood == "4", "4", data$Childhood)
    data$Childhood <- ifelse(data$Childhood == "0" | data$Childhood == "1" | data$Childhood == "2" | data$Childhood == "3" | data$Childhood == "4", data$Childhood, NA)
    data$Childhood <- as.numeric(as.character(data$Childhood))
    #data$Childhood <- factor(data$Childhood, levels= c("0", "1", "2", "3", "4"), labels=c("Capital, Large City", "City", "Town", "Countryside", "Abroad"))
  } 
  
  data$Urban <- NA
  #Create urban/rural variable
  if(is.null(variablesDHS$type.of.residence[indicator]) == FALSE & variablesDHS$type.of.residence[indicator] %in% ls(data) == TRUE){
    data$Urban <- (eval(parse(text=paste("data","$",variablesDHS$type.of.residence[indicator],sep=""))))
    data$Urban <- trimws(as.character(data$Urban))
    data$Urban <- ifelse(data$Urban == "Urbain" | data$Urban == "Urban", "1", data$Urban)
    data$Urban <- ifelse(data$Urban == "Rural", "0", data$Urban)
    data$Urban <- ifelse(data$Urban != "0" & data$Urban != "1", NA, data$Urban)
    data$Urban <- as.numeric(data$Urban)
  }
  #data$Residence <- factor(data$Residence, levels=c("1","2"), labels=c("Urban","Rural"))
  #####
  ###
  
  
  ###
  #####CREATE DV AND AGE-OF-CUTTING VARIABLES
  #Recode each file's cutting variable as "EverFGMC" and convert to 0 (No) or 1 (Yes) for consistency
  if(variablesDHS$Question[indicator] == "a503" | variablesDHS$Question[indicator] == "s227"){
    data$EverFGMC <- eval(parse(text=paste("data","$",variablesDHS$Question[indicator],sep="")))
    data$EverFGMC <- ifelse(data$EverFGMC == 2, 0, data$EverFGMC)
    data$EverFGMC <- ifelse(data$EverFGMC != 0 & data$EverFGMC != 1, NA, data$EverFGMC)
  } else {
    data$EverFGMC <- eval(parse(text=paste("data","$",variablesDHS$Question[indicator],sep="")))
    data$EverFGMC <- ifelse(data$EverFGMC == 2 | data$EverFGMC == 3, 1, data$EverFGMC)
    data$EverFGMC <- ifelse(data$EverFGMC != 0 & data$EverFGMC != 1, NA, data$EverFGMC)
  }

  #Recode the Age-at-FGMC variable in each file, recoding for consistency.
  if(is.na(variablesDHS$AgeAtCutting[indicator]) == TRUE){
    data$AgeFGMC <- NA
  } else if(variablesDHS$AgeAtCutting[indicator] == "s125"){
    data$AgeFGMC <- eval(parse(text=paste("data","$",variablesDHS$AgeAtCutting[indicator],sep="")))
    data$AgeFGMC <- ifelse(data$AgeFGMC == 1, 5, data$AgeFGMC)
    data$AgeFGMC <- ifelse(data$AgeFGMC == 2, 9, data$AgeFGMC)
    data$AgeFGMC <- ifelse(data$AgeFGMC == 3, 18, data$AgeFGMC)
    data$AgeFGMC <- ifelse(data$AgeFGMC != 5 & data$AgeFGMC != 9 & data$AgeFGMC != 18, NA, data$AgeFGMC)
    data$AgeFGMC <- ifelse(data$AgeFGMC > data$Age, NA, data$AgeFGMC)
  } else {
    data$AgeFGMC <- eval(parse(text=paste("data","$",variablesDHS$AgeAtCutting[indicator],sep="")))
    data$AgeFGMC <- ifelse(data$AgeFGMC >= 93 & data$AgeFGMC <= 96, 0, data$AgeFGMC)
    data$AgeFGMC <- ifelse(data$AgeFGMC > 96, NA, data$AgeFGMC)
    data$AgeFGMC <- ifelse(data$AgeFGMC > data$Age, NA, data$AgeFGMC)
  }
  
  #Create Weighted (within-survey) Age-of-FGMC Cutoffs at 90th and 95th percentiles
  data$AgeFGMC90 <- NA
  data$AgeFGMC95 <- NA
  if(is.na(variablesDHS$AgeAtCutting[indicator]) == FALSE){
    WeightSum <- data[which(is.na(data$AgeFGMC) == FALSE),]
    WeightSum <- sum(WeightSum$SurveyWeight)
    sorted <- data[order(data$AgeFGMC),]
    y <- NA
    z <- 0
    running.total <- 0
    for(y in 2:(nrow(sorted))){
      running.total <- (running.total + sorted$SurveyWeight[y])
      if(running.total > (.9*WeightSum) & z == 0){
        data$AgeFGMC90 <- sorted$AgeFGMC[y-1]
        z <- 1
      }else if(running.total > (.95*WeightSum)){
        data$AgeFGMC95 <- sorted$AgeFGMC[y-1]
        break
      }
    }
  }
  #####
  ###
  
  
  ###
  #####TYPE-OF-CUTTING VARIABLES
  data$FGMCtype <- NA
  data$FGMCtype2 <- NA
  if(variablesDHS$Filename[indicator] == "MLIR32FL"){
    data$FGMCtype <- data$s552
    data$FGMCtype <- ifelse(data$FGMCtype == 0, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 4, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 6, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 96, NA, data$FGMCtype)
  } else if(variablesDHS$Filename[indicator] == "BFIR31FL"){
    data$FGMCtype <- data$s903
    data$FGMCtype <- ifelse(data$FGMCtype == 0, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 4, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 6, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 96, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$s906 == 1, 3, data$FGMCtype)
    data$FGMCtype2 <- ifelse(data$s903 == 1 | data$s903 == 2 | data$s903 == 3, 1, NA)
  } else if(variablesDHS$Filename[indicator] == "CIIR3AFL"){
    data$FGMCtype <- data$s903
    data$FGMCtype <- ifelse(data$FGMCtype == 0, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 4, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 6, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 96, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$s906 == 1, 3, data$FGMCtype)
  } else if(variablesDHS$Filename[indicator] == "TZIR3AFL"){
    data$FGMCtype <- data$s1003
    data$FGMCtype <- ifelse(data$FGMCtype == 0, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 4, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 6, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 96, NA, data$FGMCtype)
  } else if(variablesDHS$Filename[indicator] == "GNIR41FL"){
    data$FGMCtype <- data$v903
    data$FGMCtype <- ifelse(data$FGMCtype == 0, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 1, 5, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 2, 5, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 1, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 4, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 6, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 5, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$v904 == 1, 3, data$FGMCtype)
  } else if(variablesDHS$Filename[indicator] == "NIIR31FL"){
    data$FGMCtype <- data$s553
    data$FGMCtype <- ifelse(data$FGMCtype == 1, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 2, 1, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 8, NA, data$FGMCtype)
    data$FGMCtype <- ifelse(data$s556 == 1, 3, data$FGMCtype)
    data$FGMCtype2 <- ifelse(data$s557 == 1, 1, NA)
  } else if(variablesDHS$Filename[indicator] == "SDIR02FL"){
    data$FGMCtype <- data$s228
    data$FGMCtype <- ifelse(data$FGMCtype == 2, 4, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 3, 2, data$FGMCtype)
    data$FGMCtype <- ifelse(data$FGMCtype == 1, 3, data$FGMCtype)
  } else if(variablesDHS$DHSorMICS[indicator] == "MICS"){
    if(is.na(variablesDHS$type.of.circumcision[indicator]) == FALSE){
      if(variablesDHS$Filename[indicator] != "SudanNorth2000wm" & variablesDHS$Filename[indicator] != "SudanSouth2000wm"){
        data$FGMCtype <- trimws(as.character(data$fg6))
        data$FGMCtype <- ifelse(data$FGMCtype == "Oui" | data$FGMtype == "oui" | data$FGMCtype == "Yes" | data$FGMCtype == "yes", "3", NA)  
      } else if(variablesDHS$Filename[indicator] == "SudanNorth2000wm"){
        data$FGMCtype <- trimws(as.character(data$hp4))
        data$FGMCtype <- ifelse(data$FGMCtype == "Pharonic", "3", data$FGMCtype)
        data$FGMCtype <- ifelse(data$FGMCtype == "Sounna", "2", data$FGMCtype)
        data$FGMCtype <- ifelse(data$FGMCtype != "1" & data$FGMCtype != "2", NA, data$FGMCtype)
      } else if(variablesDHS$Filename[indicator] == "SudanSouth2000wm"){
        data$FGMCtype <- ifelse(data$hp2a == "Pharonic", "3", NA)
        data$FGMCtype <- ifelse(data$hp2b == "Sounna", "2", data$FGMCtype)
      } else{
        data$FGMCtype <- trimws(as.character(data$fg6))
        data$FGMCtype <- ifelse(data$FGMCtype == "Oui" | data$FGMtype == "oui" | data$FGMCtype == "Yes" | data$FGMCtype == "yes", "3", NA)
      }
    }
  }
  data$FGMCtype <- as.numeric(data$FGMCtype)
  
  if(is.na(variablesDHS$flesh.or.genitals.removed[indicator]) == FALSE){
    if(variablesDHS$flesh.or.genitals.removed[indicator] == "fg104"){
      data$FGMCtype2 <- ifelse(data$fg104 == 1, 1, data$FGMCtype2)
    } else if(variablesDHS$flesh.or.genitals.removed[indicator] == "g103"){
      data$FGMCtype2 <- ifelse(data$g103 == 1, 1, data$FGMCtype2)
    } else if(variablesDHS$flesh.or.genitals.removed[indicator] == "s1004"){
      data$FGMCtype2 <- ifelse(data$s1004 == 1, 1, data$FGMCtype2)
    }
  }
  
  if(is.na(variablesDHS$genitals.carved.or.nicked[indicator]) == FALSE){
    if(variablesDHS$genitals.carved.or.nicked[indicator] == "fg105"){
      data$FGMCtype2 <- ifelse(data$fg105 == 1, 1, data$FGMCtype2)
    } else if(variablesDHS$genitals.carved.or.nicked[indicator] == "g104"){
      data$FGMCtype2 <- ifelse(data$g104 == 1, 1, data$FGMCtype2)
    } else if(variablesDHS$genitals.carved.or.nicked[indicator] == "s1005"){
      data$FGMCtype2 <- ifelse(data$s1005 == 1, 1, data$FGMCtype2)
    }
  }
  
  if(is.na(variablesDHS$vagina.was.sewed.or.closed[indicator]) == FALSE){
    if(variablesDHS$vagina.was.sewed.or.closed[indicator] == "fg106"){
      data$FGMCtype2 <- ifelse(data$fg106 == 1, 2, data$FGMCtype2)
      data$FGMCtype <- ifelse(data$fg106 == 1, 3, data$FGMCtype)
    } else if(variablesDHS$vagina.was.sewed.or.closed[indicator] == "g105"){
      data$FGMCtype2 <- ifelse(data$g105 == 1, 2, data$FGMCtype2)
      data$FGMCtype <- ifelse(data$g105 == 1, 3, data$FGMCtype)
    } else if(variablesDHS$vagina.was.sewed.or.closed[indicator] == "s1006"){
      data$FGMCtype2 <- ifelse(data$s1006 == 1, 2, data$FGMCtype2)
      data$FGMCtype <- ifelse(data$s1006 == 1, 3, data$FGMCtype)
    } else if(variablesDHS$vagina.was.sewed.or.closed[indicator] == "s906"){
      data$FGMCtype2 <- ifelse(data$s906 == 1, 2, data$FGMCtype2)
      data$FGMCtype <- ifelse(data$s906 == 1, 3, data$FGMCtype)
    } else if(variablesDHS$vagina.was.sewed.or.closed[indicator] == "s808"){
      data$FGMCtype2 <- ifelse(data$s808 == 1, 2, data$FGMCtype2)
      data$FGMCtype <- ifelse(data$s808 == 1, 3, data$FGMCtype)
    }
  }
  data$FGMCtype2 <- ifelse(is.na(data$FGMCtype2) == TRUE & data$FGMCtype == 1, 1, data$FGMCtype2)
  data$FGMCtype2 <- ifelse(is.na(data$FGMCtype2) == TRUE & data$FGMCtype == 2, 1, data$FGMCtype2)
  data$FGMCtype2 <- ifelse(is.na(data$FGMCtype2) == TRUE & data$FGMCtype == 3, 2, data$FGMCtype2)
  data$FGMCtype2 <- ifelse(is.na(data$FGMCtype2) == TRUE & data$FGMCtype == 4, 3, data$FGMCtype2)
  #data$FGMCtype <- factor(data$FGMCtype, levels= c("1", "2", "3", "4"), labels=c("Clitoridectomy", "Excision", "Infibulation", "Other"))
  #####
  ###
  

  ###
  #####SUBSET THE DATA#####
  keep <- c("CountryName", "Interview.Year", "Year", "DHS", "Phase", "FileName", "EverMarried", "SampleSize", "Population", "SurveyWeight", "caseid", "cluster", "PSU", "SSN", "Interviewer", "Supervisor", "EverFGMC", "AgeFGMC", "AgeFGMC90", "AgeFGMC95", "FGMCtype", "FGMCtype2", "Region", "Religion", "Ethnicity", "Age", "BirthYear", "BirthYearAge", "Childhood", "Urban")
  newdata <- data[keep]
  newdata <- newdata[complete.cases(newdata$EverFGMC), ]
  
  if(loop == 1){
    final <- newdata
  } else{
    final <- rbind(final,newdata)
  }
  #####
  ###
  
  
  ###
  #####
  #Print the loop number so we can check our rate of progress.
  Sys.sleep(0.0001)
  print(loop)
  flush.console()
  #####
  ###
}
#####
###
write.dta(final, "DHSmerge.dta")
